import time
import asyncio
import requests
from bs4 import BeautifulSoup
import uuid
import schedule

from utils.HBaseConnect import HBaseConnect
from utils.MysqlConnect import MysqlConnect

url = 'https://www.zhihu.com/hot'
tableName = "hot_search_keywords"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 '
                  'Safari/537.36',
    'Cookie': 'd_c0="AHBew4eAFRSPThE47QZg2PZvss-BjI1-0Gk=|1637820363"; _zap=bd5bbc06-296b-4c07-bec6-6f97f67f77c7; '
              'z_c0=2|1:0|10:1715769397|4:z_c0|80'
              ':MS4xRjlHMUxBQUFBQUFtQUFBQVlBSlZUVW9RSjJkTHZBcG1RQ3ZFSVFON2Z4UGhmNUdMR2JHaWhnPT0'
              '=|f1b6fa10c7b618ed141869d5e97f8a55a0916d1387a385bee09db3dd1ec9d616; '
              '_xsrf=fca4c666-68f7-43cf-81b3-6e67025efae5; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1715861278,'
              '1715907334,1716126445,1717386886; '
              '__zse_ck=001_HA1E4UZLCx20o9P+1b6lET=W6g0x8tWITRTf9ts2C9KlBFUs91eoJ9eu7xX5q1b9DA3rk4ppO7d8ZRw6pjPD=P'
              '=AT5MhfSSQUuZos6qBDIE1CRzKuXrppvuThsgUOXWi; SESSIONID=rnt1lUq2X3NyIMUGCq4ty2CUT28oMfg8DjWn9oVeqT5; '
              'JOID=UVAXAEvaOim9PfoWGNoSuFGtmzsNo15cj1KkbSuBAm3Fd7RIVfYOdts1-x0aE0LjrKhJT7eGUSEwgChxnXZgOUc=; '
              'osd=VVEWBUneOyi4P_4XGd8QvFCsnjkJol9ZjValbC6DBmzEcrZMVPcLdN80-hgYF0PiqapNTraDUyUxgS1zmXdhPEU=; '
              'q_c1=266d95d600a04d8796887347cdaaf0db|1717394511000|1717394511000; '
              'BEC=e501eab8fa0f1e4eedff8088b3614a75; tst=h; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1717394573; '
              'KLBRSID=4efa8d1879cb42f8c5b48fe9f8d37c16|1717395331|1717393325'
}


def save_hot_search_keywords(hbase, mysql, dic):
    print("排名" + dic['rank'], dic)
    hbase.putTable(tableName, rowKey=str(uuid.uuid1().hex), data={
        "info:keyword": dic['keyword'],
        "info:num": dic['searchNum'],
        "info:rank": dic['rank'],
        "info:url": dic['url'],
        "info:platform": "知乎",
        "info:time": dic['time'],
    })
    insert_sql = (
        "INSERT INTO information(hot_keyword, search_num, ranking, url, platform,time) "
        "VALUES(%s, %s, %s, %s, %s, %s)")
    insert_args = (dic['keyword'], dic['searchNum'], dic['rank'], dic['url'],"知乎",dic['time'])
    mysql.insert(insert_sql, insert_args)


async def get_hot_search_keywords():
    try:
        # 初始化hbase的参数
        hbase = HBaseConnect()
        hbase.start()
        mysql = MysqlConnect()
        current_time = time.strftime('%Y-%m-%d %H:%M:%S')
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.text, 'html.parser')
        items = soup.find_all('section', {'class': 'HotItem'})
        rank = 1
        dic = {"time": current_time}
        for item in items:
            dic['keyword'] = item.find('h2', {'class': 'HotItem-title'}).text
            cleaned_string = item.find('div', {'class': 'HotItem-metrics'}).text.split(' ')[0]
            dic['searchNum'] = cleaned_string + '0000'
            dic['url'] = item.find('a', {'rel': 'noopener noreferrer'}).get('href')
            dic['rank'] = str(rank)
            save_hot_search_keywords(hbase, mysql, dic)
            rank += 1
        hbase.stop()
        # 关闭mysql
        mysql.cur.close()
        mysql.connection.close()
    except Exception as e:
        print("Exception", e)


def run_asyncio_task():
    print("当前时间", time.strftime('%Y-%m-%d %H:%M:%S'))
    asyncio.run(get_hot_search_keywords())


if __name__ == '__main__':
    # 在每小时的00, 10, 20, 30, 40, 50分钟运行任务
    schedule.every().hour.at(":00").do(run_asyncio_task)
    schedule.every().hour.at(":10").do(run_asyncio_task)
    schedule.every().hour.at(":20").do(run_asyncio_task)
    schedule.every().hour.at(":30").do(run_asyncio_task)
    schedule.every().hour.at(":40").do(run_asyncio_task)
    schedule.every().hour.at(":50").do(run_asyncio_task)
    while True:
        schedule.run_pending()
        time.sleep(1)
